TASK 1 and TASK 2

In these tasks, we are asked to choose 5 bookmakers and and apply PCA with their odds data.

For example, for Pinnacle, the first two components capture 92% of the total variance.

Also, away win odd is the feature that captures the variability the most.

## Warning: package 'dplyr' was built under R version 3.4.4
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## Loading required package: data.table
## Warning: package 'data.table' was built under R version 3.4.4
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## Importance of components:
##                           Comp.1     Comp.2      Comp.3      Comp.4
## Standard deviation     0.1825755 0.05393369 0.012518614 0.006318502
## Proportion of Variance 0.9139673 0.07975649 0.004296931 0.001094647
## Cumulative Proportion  0.9139673 0.99372380 0.998020727 0.999115374
##                              Comp.5       Comp.6       Comp.7
## Standard deviation     0.0055127591 1.365384e-03 9.428989e-05
## Proportion of Variance 0.0008332664 5.111586e-05 2.437677e-07
## Cumulative Proportion  0.9999486404 9.999998e-01 1.000000e+00
## 
## Loadings:
##                    Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## odd1_Pinnacle_NA    0.194  0.916  0.240  0.164 -0.192              
## odd2_Pinnacle_NA   -0.957         0.166  0.132 -0.173              
## oddX_Pinnacle_NA   -0.214  0.371 -0.387 -0.422  0.690  0.113       
## over_Pinnacle_0.5                        0.108               -0.992
## over_Pinnacle_2.5                 0.668 -0.214  0.390 -0.590       
## under_Pinnacle_0.5                      -0.848 -0.506        -0.127
## under_Pinnacle_2.5               -0.559        -0.214 -0.796       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.143  0.143  0.143  0.143  0.143  0.143  0.143
## Cumulative Var  0.143  0.286  0.429  0.571  0.714  0.857  1.000

## Importance of components:
##                           Comp.1     Comp.2     Comp.3      Comp.4
## Standard deviation     0.1390548 0.08641774 0.04472684 0.009121567
## Proportion of Variance 0.6681044 0.25803472 0.06912080 0.002874824
## Cumulative Proportion  0.6681044 0.92613914 0.99525994 0.998134766
##                             Comp.5       Comp.6       Comp.7       Comp.8
## Standard deviation     0.006217179 0.0028650498 0.0023409420 1.222277e-03
## Proportion of Variance 0.001335547 0.0002836199 0.0001893449 5.161925e-05
## Cumulative Proportion  0.999470313 0.9997539325 0.9999432774 9.999949e-01
##                              Comp.9
## Standard deviation     3.843178e-04
## Proportion of Variance 5.103326e-06
## Cumulative Proportion  1.000000e+00
## 
## Loadings:
##                  Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## NO_Betway_NA                    0.109 -0.300 -0.462 -0.417  0.359  0.616
## YES_Betway_NA                  -0.103  0.293  0.391  0.195 -0.325  0.778
## odd1_Betway_NA   -0.234  0.505 -0.772  0.196 -0.231                     
## odd2_Betway_NA    0.799 -0.373 -0.368  0.186 -0.224                     
## oddX_Betway_NA    0.208  0.103 -0.337 -0.701  0.552         0.190       
## over_Betway_0.5                                     -0.144              
## over_Betway_2.5                        0.307  0.414 -0.834        -0.108
## under_Betway_0.5  0.507  0.763  0.362  0.153                            
## under_Betway_2.5                      -0.378 -0.217 -0.261 -0.850       
##                  Comp.9
## NO_Betway_NA           
## YES_Betway_NA          
## odd1_Betway_NA         
## odd2_Betway_NA         
## oddX_Betway_NA         
## over_Betway_0.5  -0.986
## over_Betway_2.5   0.135
## under_Betway_0.5       
## under_Betway_2.5       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.111  0.111  0.111  0.111  0.111  0.111  0.111  0.111
## Cumulative Var  0.111  0.222  0.333  0.444  0.556  0.667  0.778  0.889
##                Comp.9
## SS loadings     1.000
## Proportion Var  0.111
## Cumulative Var  1.000

## Importance of components:
##                           Comp.1    Comp.2     Comp.3      Comp.4
## Standard deviation     0.1823609 0.1021055 0.04409562 0.007483578
## Proportion of Variance 0.7272582 0.2279942 0.04252224 0.001224740
## Cumulative Proportion  0.7272582 0.9552524 0.99777462 0.998999359
##                              Comp.5       Comp.6       Comp.7       Comp.8
## Standard deviation     0.0056889942 0.0028326541 1.753611e-03 1.494435e-03
## Proportion of Variance 0.0007077767 0.0001754738 6.724989e-05 4.884043e-05
## Cumulative Proportion  0.9997071360 0.9998826098 9.999499e-01 9.999987e-01
##                              Comp.9
## Standard deviation     2.438038e-04
## Proportion of Variance 1.299889e-06
## Cumulative Proportion  1.000000e+00
## 
## Loadings:
##                  Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## NO_Unibet_NA                    0.115 -0.349 -0.358 -0.492  0.380  0.591
## YES_Unibet_NA                  -0.104  0.348  0.262  0.197 -0.373  0.788
## odd1_Unibet_NA           0.576 -0.761        -0.272                     
## odd2_Unibet_NA    0.608 -0.613 -0.419        -0.266                     
## oddX_Unibet_NA    0.208        -0.318 -0.489  0.749         0.225       
## over_Unibet_0.5                                                         
## over_Unibet_2.5                        0.574  0.309 -0.732        -0.136
## under_Unibet_0.5  0.759  0.535  0.343  0.136                            
## under_Unibet_2.5                      -0.394        -0.412 -0.811       
##                  Comp.9
## NO_Unibet_NA           
## YES_Unibet_NA          
## odd1_Unibet_NA         
## odd2_Unibet_NA         
## oddX_Unibet_NA         
## over_Unibet_0.5  -0.995
## over_Unibet_2.5        
## under_Unibet_0.5       
## under_Unibet_2.5       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.111  0.111  0.111  0.111  0.111  0.111  0.111  0.111
## Cumulative Var  0.111  0.222  0.333  0.444  0.556  0.667  0.778  0.889
##                Comp.9
## SS loadings     1.000
## Proportion Var  0.111
## Cumulative Var  1.000

## Importance of components:
##                           Comp.1    Comp.2     Comp.3      Comp.4
## Standard deviation     0.1515605 0.0742054 0.03799709 0.007034968
## Proportion of Variance 0.7652335 0.1834395 0.04809748 0.001648717
## Cumulative Proportion  0.7652335 0.9486730 0.99677046 0.998419173
##                             Comp.5       Comp.6       Comp.7       Comp.8
## Standard deviation     0.005917478 0.0026733107 0.0018943852 0.0012914873
## Proportion of Variance 0.001166528 0.0002380788 0.0001195524 0.0000555651
## Cumulative Proportion  0.999585701 0.9998237797 0.9999433321 0.9999988972
##                              Comp.9
## Standard deviation     1.819412e-04
## Proportion of Variance 1.102768e-06
## Cumulative Proportion  1.000000e+00
## 
## Loadings:
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## NO_bwin_NA                   -0.148  0.299 -0.390 -0.564  0.280  0.583
## YES_bwin_NA                   0.138 -0.216  0.302  0.358 -0.240  0.810
## odd1_bwin_NA    0.214 -0.662  0.652  0.291                            
## odd2_bwin_NA   -0.841  0.243  0.390  0.281                            
## oddX_bwin_NA   -0.244 -0.232  0.228 -0.799 -0.396 -0.139  0.139       
## over_bwin_0.5                                                         
## over_bwin_2.5                       -0.230  0.625 -0.708 -0.174       
## under_bwin_0.5 -0.427 -0.657 -0.565         0.246                     
## under_bwin_2.5                             -0.376 -0.147 -0.898       
##                Comp.9
## NO_bwin_NA           
## YES_bwin_NA          
## odd1_bwin_NA         
## odd2_bwin_NA         
## oddX_bwin_NA         
## over_bwin_0.5  -0.992
## over_bwin_2.5   0.109
## under_bwin_0.5       
## under_bwin_2.5       
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.111  0.111  0.111  0.111  0.111  0.111  0.111  0.111
## Cumulative Var  0.111  0.222  0.333  0.444  0.556  0.667  0.778  0.889
##                Comp.9
## SS loadings     1.000
## Proportion Var  0.111
## Cumulative Var  1.000

## Importance of components:
##                           Comp.1    Comp.2     Comp.3      Comp.4
## Standard deviation     0.2032587 0.1386602 0.06286807 0.009788170
## Proportion of Variance 0.6390488 0.2973987 0.06113583 0.001481967
## Cumulative Proportion  0.6390488 0.9364475 0.99758331 0.999065276
##                              Comp.5       Comp.6      Comp.7       Comp.8
## Standard deviation     0.0063465469 0.0034916697 0.002023776 1.789684e-03
## Proportion of Variance 0.0006230323 0.0001885827 0.000063352 4.954368e-05
## Cumulative Proportion  0.9996883080 0.9998768907 0.999940243 9.999898e-01
##                              Comp.9
## Standard deviation     8.125913e-04
## Proportion of Variance 1.021363e-05
## Cumulative Proportion  1.000000e+00
## 
## Loadings:
##                   Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## NO_Betsafe_NA                    0.124  0.452 -0.252 -0.519         0.666
## YES_Betsafe_NA                  -0.113 -0.420  0.103         0.816  0.355
## odd1_Betsafe_NA    0.111  0.565 -0.771        -0.267                     
## odd2_Betsafe_NA   -0.636 -0.586 -0.428        -0.257                     
## oddX_Betsafe_NA   -0.203        -0.287  0.159  0.884 -0.179 -0.136  0.128
## over_Betsafe_0.5                                     -0.106              
## over_Betsafe_2.5                       -0.633        -0.679 -0.325 -0.129
## under_Betsafe_0.5 -0.732  0.573  0.329 -0.152                            
## under_Betsafe_2.5                       0.406        -0.470  0.447 -0.626
##                   Comp.9
## NO_Betsafe_NA           
## YES_Betsafe_NA          
## odd1_Betsafe_NA         
## odd2_Betsafe_NA         
## oddX_Betsafe_NA         
## over_Betsafe_0.5   0.988
## over_Betsafe_2.5        
## under_Betsafe_0.5       
## under_Betsafe_2.5 -0.120
## 
##                Comp.1 Comp.2 Comp.3 Comp.4 Comp.5 Comp.6 Comp.7 Comp.8
## SS loadings     1.000  1.000  1.000  1.000  1.000  1.000  1.000  1.000
## Proportion Var  0.111  0.111  0.111  0.111  0.111  0.111  0.111  0.111
## Cumulative Var  0.111  0.222  0.333  0.444  0.556  0.667  0.778  0.889
##                Comp.9
## SS loadings     1.000
## Proportion Var  0.111
## Cumulative Var  1.000

Comments:

It is not easy to distinguish over/under 2.5 situation by looking at the PCA results. On the other hand, home/away/tie situations can be distinguished easier in all 5 bookmakers’ PCA plots (V-shaped and colors are not distributed homogenic)

There is no significant difference between MDS with Euclidean distance and PCA results.

MDS with Manhattan distance performs better than MDS with Euclidean distance. (Sharper V-shaped)

TASK 3

Below are the image I chose, RGB channels of it, its noised version, and its channels.

## Importance of components:
##                           Comp.1     Comp.2     Comp.3
## Standard deviation     0.4530941 0.09680673 0.04993670
## Proportion of Variance 0.9453617 0.04315512 0.01148315
## Cumulative Proportion  0.9453617 0.98851685 1.00000000
## 
## Loadings:
##      Comp.1 Comp.2 Comp.3
## [1,] -0.572  0.707  0.415
## [2,] -0.587        -0.810
## [3,] -0.572 -0.707  0.415
## 
##                Comp.1 Comp.2 Comp.3
## SS loadings     1.000  1.000  1.000
## Proportion Var  0.333  0.333  0.333
## Cumulative Var  0.333  0.667  1.000
  1. I used first, second and third components of PCA consequently, to reconstruct the image. First component is better at capturing the variance, as can be seen below.

  2. I plotted first, second and third components of PCA as 3 by 3 images. APPENDIX setwd(“C:/Users/dsa/Desktop/582 Dosyalar”)

library(jpeg) library(dplyr) require(data.table)

TASK1 and TASK2

matches=readRDS(“df9b1196-e3cf-4cc7-9159-f236fe738215_matches.rds”)

odd_details=readRDS(“df9b1196-e3cf-4cc7-9159-f236fe738215_odd_details.rds”)

matches=unique(matches)

matches[,c(“HomeGoals”,“AwayGoals”):=tstrsplit(score,‘:’)]

transform characters to numeric for scores

matches\(HomeGoals=as.numeric(matches\)HomeGoals) matches[,AwayGoals:=as.numeric(AwayGoals)]

calculate total goals

matches[,TotalGoals:=HomeGoals+AwayGoals]

mark over under

matches[,IsOver:=0] matches[TotalGoals>2,IsOver:=1]

mark match outcome

matches[,Result:=2] #tie matches[AwayGoals>HomeGoals,Result:=1] #away matches[AwayGoals<HomeGoals,Result:=0] #home

order data in ascending date

odds_ordered=odd_details[order(matchId,oddtype,bookmaker,date)]

take final odds

odds_ordered_final=odds_ordered[,list(final_odd=odd[.N]), by=list(matchId,oddtype,bookmaker,totalhandicap)]

for pinnacle

bookmaker_odds=odds_ordered_final[bookmaker==‘Pinnacle’ & (is.na(totalhandicap) | totalhandicap == 2.5 | totalhandicap == 0.5) & (oddtype %in% c(“odd1”,“odd2”,“oddX”,“YES”,“NO”,“over”,“under”))]

bookmaker_wide=dcast(bookmaker_odds, matchId~oddtype+bookmaker+totalhandicap, value.var=‘final_odd’)

merged_matches=merge(matches,bookmaker_wide,by=‘matchId’)

merged_matches=merged_matches[complete.cases(merged_matches)]

Result1<-as.numeric(merged_matches\(IsOver) Result2<-merged_matches\)Result merged_matches<-merged_matches[,-c(1:12)]

scaling

merged_matches<-((merged_matches-min(merged_matches))/(max(merged_matches)-min(merged_matches)))

pca<-princomp(merged_matches) summary(pca) pca$loadings

mds_data<-dist(merged_matches,method = “euclidean”) mds_data<-cmdscale(mds_data) mds_data_manhattan<-dist(merged_matches,method = “manhattan”) mds_data_manhattan<-cmdscale(mds_data_manhattan)

par(mfrow=c(2,2)) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Pinnacle’,col=Result1+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2),pch = “.”,pt.cex = 6) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Pinnacle’,col=Result2+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Home”,“Away”,“Tie”),col=c(1,2,3), pch = “.”,pt.cex = 6)

plot(mds_data[,1],mds_data[,2],main=‘MDS with Euclidean Dist. for Pinnacle’,xlab=‘’, ylab=’’,col=Result1+1,pch=“.”,cex=7) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

plot(mds_data_manhattan[,1],mds_data_manhattan[,2],main=‘MDS with Manhattan Dist. for Pinnacle’,xlab=‘’,cex.main = 1, ylab=’’,col=Result1+1,pch=“.”,cex=6) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

for betway

bookmaker_odds=odds_ordered_final[bookmaker==‘Betway’ & (is.na(totalhandicap) | totalhandicap == 2.5 | totalhandicap == 0.5) & (oddtype %in% c(“odd1”,“odd2”,“oddX”,“YES”,“NO”,“over”,“under”))]

bookmaker_wide=dcast(bookmaker_odds, matchId~oddtype+bookmaker+totalhandicap, value.var=‘final_odd’)

merged_matches=merge(matches,bookmaker_wide,by=‘matchId’)

merged_matches=merged_matches[complete.cases(merged_matches)]

Result1<-as.numeric(merged_matches\(IsOver) Result2<-merged_matches\)Result merged_matches<-merged_matches[,-c(1:12)]

scaling

merged_matches<-((merged_matches-min(merged_matches))/(max(merged_matches)-min(merged_matches)))

pca<-princomp(merged_matches) summary(pca) pca$loadings

mds_data<-dist(merged_matches,method = “euclidean”) mds_data<-cmdscale(mds_data) mds_data_manhattan<-dist(merged_matches,method = “manhattan”) mds_data_manhattan<-cmdscale(mds_data_manhattan)

par(mfrow=c(2,2)) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Betway’,col=Result1+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2),pch = “.”,pt.cex = 6) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Betway’,col=Result2+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Home”,“Away”,“Tie”),col=c(1,2,3), pch = “.”,pt.cex = 6)

plot(mds_data[,1],mds_data[,2],main=‘MDS with Euclidean Dist. for Betway’,xlab=‘’, ylab=’’,col=Result1+1,pch=“.”,cex=7) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

plot(mds_data_manhattan[,1],mds_data_manhattan[,2],main=‘MDS with Manhattan Dist. for Betway’,xlab=‘’,cex.main = 1, ylab=’’,col=Result1+1,pch=“.”,cex=6) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

for Unibet

bookmaker_odds=odds_ordered_final[bookmaker==‘Unibet’ & (is.na(totalhandicap) | totalhandicap == 2.5 | totalhandicap == 0.5) & (oddtype %in% c(“odd1”,“odd2”,“oddX”,“YES”,“NO”,“over”,“under”))]

bookmaker_wide=dcast(bookmaker_odds, matchId~oddtype+bookmaker+totalhandicap, value.var=‘final_odd’)

merged_matches=merge(matches,bookmaker_wide,by=‘matchId’)

merged_matches=merged_matches[complete.cases(merged_matches)]

Result1<-as.numeric(merged_matches\(IsOver) Result2<-merged_matches\)Result merged_matches<-merged_matches[,-c(1:12)]

scaling

merged_matches<-((merged_matches-min(merged_matches))/(max(merged_matches)-min(merged_matches)))

pca<-princomp(merged_matches) summary(pca) pca$loadings

mds_data<-dist(merged_matches,method = “euclidean”) mds_data<-cmdscale(mds_data) mds_data_manhattan<-dist(merged_matches,method = “manhattan”) mds_data_manhattan<-cmdscale(mds_data_manhattan)

par(mfrow=c(2,2)) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Unibet’,col=Result1+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2),pch = “.”,pt.cex = 6) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Unibet’,col=Result2+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Home”,“Away”,“Tie”),col=c(1,2,3), pch = “.”,pt.cex = 6)

plot(mds_data[,1],mds_data[,2],main=‘MDS with Euclidean Dist. for Unibet’,xlab=‘’, ylab=’’,col=Result1+1,pch=“.”,cex=7) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

plot(mds_data_manhattan[,1],mds_data_manhattan[,2],main=‘MDS with Manhattan Dist. for Unibet’,xlab=‘’,cex.main = 1, ylab=’’,col=Result1+1,pch=“.”,cex=6) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

for bwin

bookmaker_odds=odds_ordered_final[bookmaker==‘bwin’ & (is.na(totalhandicap) | totalhandicap == 2.5 | totalhandicap == 0.5) & (oddtype %in% c(“odd1”,“odd2”,“oddX”,“YES”,“NO”,“over”,“under”))]

bookmaker_wide=dcast(bookmaker_odds, matchId~oddtype+bookmaker+totalhandicap, value.var=‘final_odd’)

merged_matches=merge(matches,bookmaker_wide,by=‘matchId’)

merged_matches=merged_matches[complete.cases(merged_matches)]

Result1<-as.numeric(merged_matches\(IsOver) Result2<-merged_matches\)Result merged_matches<-merged_matches[,-c(1:12)]

scaling

merged_matches<-((merged_matches-min(merged_matches))/(max(merged_matches)-min(merged_matches)))

pca<-princomp(merged_matches) summary(pca) pca$loadings

mds_data<-dist(merged_matches,method = “euclidean”) mds_data<-cmdscale(mds_data) mds_data_manhattan<-dist(merged_matches,method = “manhattan”) mds_data_manhattan<-cmdscale(mds_data_manhattan)

par(mfrow=c(2,2)) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for bwin’,col=Result1+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2),pch = “.”,pt.cex = 6) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for bwin’,col=Result2+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Home”,“Away”,“Tie”),col=c(1,2,3), pch = “.”,pt.cex = 6)

plot(mds_data[,1],mds_data[,2],main=‘MDS with Euclidean Dist. for bwin’,xlab=‘’, ylab=’’,col=Result1+1,pch=“.”,cex=7) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

plot(mds_data_manhattan[,1],mds_data_manhattan[,2],main=‘MDS with Manhattan Dist. for bwin’,xlab=‘’,cex.main = 1, ylab=’’,col=Result1+1,pch=“.”,cex=6) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

for Betsafe

bookmaker_odds=odds_ordered_final[bookmaker==‘Betsafe’ & (is.na(totalhandicap) | totalhandicap == 2.5 | totalhandicap == 0.5) & (oddtype %in% c(“odd1”,“odd2”,“oddX”,“YES”,“NO”,“over”,“under”))]

bookmaker_wide=dcast(bookmaker_odds, matchId~oddtype+bookmaker+totalhandicap, value.var=‘final_odd’)

merged_matches=merge(matches,bookmaker_wide,by=‘matchId’)

merged_matches=merged_matches[complete.cases(merged_matches)]

Result1<-as.numeric(merged_matches\(IsOver) Result2<-merged_matches\)Result merged_matches<-merged_matches[,-c(1:12)]

scaling

merged_matches<-((merged_matches-min(merged_matches))/(max(merged_matches)-min(merged_matches)))

pca<-princomp(merged_matches) summary(pca) pca$loadings mds_data<-dist(merged_matches,method = “euclidean”) mds_data<-cmdscale(mds_data) mds_data_manhattan<-dist(merged_matches,method = “manhattan”) mds_data_manhattan<-cmdscale(mds_data_manhattan)

par(mfrow=c(2,2)) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Betsafe’,col=Result1+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2),pch = “.”,pt.cex = 6) plot(pca\(scores[,1],pca\)scores[,2],main=‘PCA for Betsafe’,col=Result2+1,pch=“.”,cex=6,xlab=“Comp1”,ylab=“Comp2”) legend(“topleft”, legend=c(“Home”,“Away”,“Tie”),col=c(1,2,3), pch = “.”,pt.cex = 6)

plot(mds_data[,1],mds_data[,2],main=‘MDS with Euclidean Dist. for Betsafe’,xlab=‘’, ylab=’’,col=Result1+1,pch=“.”,cex=7) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

plot(mds_data_manhattan[,1],mds_data_manhattan[,2],main=‘MDS with Manhattan Dist. for Betsafe’,xlab=‘’,cex.main = 1, ylab=’’,col=Result1+1,pch=“.”,cex=6) legend(“bottomleft”, legend=c(“Under 2.5”,“Over 2.5”),col=c(1,2), pch = “.”,pt.cex = 6)

task3

homework_image<-readJPEG(“image512.jpg”) par(mfrow=c(1,1))

display image

plot(NA,xlim=c(0,nrow(homework_image)),ylim=c(0,ncol(homework_image))) rasterImage(homework_image,0,0,nrow(homework_image),ncol(homework_image),interpolate=TRUE)

display each channel

par(mfrow=c(1,3)) image(homework_image[,,1],col=rgb(c(0:255)/255,0,0),useRaster = TRUE,axes=FALSE) image(homework_image[,,2],col=rgb(0,c(0:255)/255,0),useRaster = TRUE,axes=FALSE) image(homework_image[,,3],col=rgb(0,0,c(0:255)/255),useRaster = TRUE,axes=FALSE)

add noise

hw_image_initial<-homework_image

for(k in 1:3){ for(i in 1:512){ for(j in 1:512){ homework_image[i,j,k]<- homework_image[i,j,k]+runif(1,0,0.1) } }

}

scaling

homework_image<-homework_image/max(homework_image)

par(mfrow=c(1,1)) plot(NA,xlim=c(0,nrow(homework_image)),ylim=c(0,ncol(homework_image))) rasterImage(homework_image,0,0,nrow(homework_image),ncol(homework_image),interpolate=TRUE)

displaying each channel of the noised image

par(mfrow=c(1,3)) image(homework_image[,,1],col=rgb(c(0:255)/255,0,0),useRaster = TRUE,axes=FALSE) image(homework_image[,,2],col=rgb(0,c(0:255)/255,0),useRaster = TRUE,axes=FALSE) image(homework_image[,,3],col=rgb(0,0,c(0:255)/255),useRaster = TRUE,axes=FALSE)

plotting the gray image

par(mfrow=c(1,1)) gray_image<-homework_image[,,1] plot(NA,xlim=c(0,nrow(gray_image)),ylim=c(0,ncol(gray_image))) rasterImage(gray_image,0,0,nrow(gray_image),ncol(gray_image),interpolate = TRUE)

dividing into patches

d <- as.matrix(gray_image) ii <- seq(1, (nrow(gray_image)-2), 1) jj <- seq(1, (ncol(gray_image)-2), 1) r <- list()

q <- 1 for (i in ii) { for (j in jj) { r[[q]] <- d[i:(i+2), j:(j+2)] q <- q + 1 } }

r<-t(as.data.table(r[])) rownames(r)<-NULL

pca

pca_image<-princomp(r) summary(pca_image) pca_image$loadings

component1

par(mfrow=c(1,1)) pca_image\(scores[,1]<-((pca_image\)scores[,1]-min(pca_image\(scores[,1]))/(max(pca_image\)scores[,1])-min(pca_image\(scores[,1]))) pca_image\)scores[,1]<-t(matrix(pca_image$scores[,1],510,510))

plot(NA,xlim=c(0,1),ylim=c(0,1)) rasterImage(pca_image$scores[,1],0,0,1,1,interpolate=TRUE)

component2

pca_image\(scores[,2]<-((pca_image\)scores[,2]-min(pca_image\(scores[,2]))/(max(pca_image\)scores[,2])-min(pca_image\(scores[,2]))) pca_image\)scores[,2]<-t(matrix(pca_image$scores[,2],510,510))

plot(NA,xlim=c(0,1),ylim=c(0,1)) rasterImage(pca_image$scores[,2],0,0,1,1,interpolate=TRUE)

component3

pca_image\(scores[,3]<-((pca_image\)scores[,3]-min(pca_image\(scores[,3]))/(max(pca_image\)scores[,3])-min(pca_image\(scores[,3]))) pca_image\)scores[,3]<-t(matrix(pca_image$scores[,3],510,510))

plot(NA,xlim=c(0,1),ylim=c(0,1)) rasterImage(pca_image$scores[,3],0,0,1,1,interpolate=TRUE)

loading1

loading1<-matrix(pca_image$loadings[,1],3,3) loading1<-((loading1-min(loading1))/(max(loading1)-min(loading1))) plot(NA,xlim=c(0,1),ylim=c(0,1)) rasterImage(loading1,0,0,1,1,interpolate=TRUE)

loading2

loading2<-matrix(pca_image$loadings[,2],3,3) loading2<-((loading2-min(loading2))/(max(loading2)-min(loading2))) plot(NA,xlim=c(0,1),ylim=c(0,1)) rasterImage(loading2,0,0,1,1,interpolate=TRUE)

loading3

loading3<-matrix(pca_image$loadings[,3],3,3) loading3<-((loading3-min(loading3))/(max(loading3)-min(loading3))) plot(NA,xlim=c(0,1),ylim=c(0,1)) rasterImage(loading3,0,0,1,1,interpolate=TRUE)